From 2dcac0c2263ee391c2e8cb700353a167181a51b9 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Fri, 12 Mar 2004 13:36:55 +0000 Subject: [PATCH] bitkeeper revision 1.781.1.1 (4051bcf71wSVHdhOOqsU19dm2FSXDQ) physdev.c, xc_physdev.c: new file Many files: First half of Rolf's IO-virtualisation patch. --- .rootkeys | 2 + tools/examples/xc_dom_create.py | 9 + tools/misc/Makefile | 12 +- tools/xc/lib/xc.h | 7 + tools/xc/lib/xc_physdev.c | 27 + tools/xc/py/Xc.c | 43 +- xen/arch/i386/entry.S | 3 +- xen/common/dom0_ops.c | 11 + xen/common/domain.c | 7 + xen/common/physdev.c | 815 ++++++++++++++++++ xen/drivers/char/console.c | 6 +- xen/include/hypervisor-ifs/hypervisor-if.h | 8 +- .../arch/xeno/drivers/console/console.c | 4 +- .../include/asm-xeno/hypervisor.h | 4 +- 14 files changed, 933 insertions(+), 25 deletions(-) create mode 100644 tools/xc/lib/xc_physdev.c create mode 100644 xen/common/physdev.c diff --git a/.rootkeys b/.rootkeys index 3458df6fc6..5862740dea 100644 --- a/.rootkeys +++ b/.rootkeys @@ -79,6 +79,7 @@ 3fbba6db7li3FJiABYtCmuGxOJxEGw tools/xc/lib/xc_linux_save.c 3fbba6db7WnnJr0KFrIFrqNlSKvFYg tools/xc/lib/xc_misc.c 40278d9ctaHVDaEuwhXI3Om2JOjx9w tools/xc/lib/xc_netbsd_build.c +4051bce6CHAsYh8P5t2OHDtRWOP9og tools/xc/lib/xc_physdev.c 3fbba6dctWRWlFJkYb6hdix2X4WMuw tools/xc/lib/xc_private.c 3fbba6dcbVrG2hPzEzwdeV_UC8kydQ tools/xc/lib/xc_private.h 3fbba6dcoGq9hQlksrBUfC2P5F6sGg tools/xc/lib/xc_vbd.c @@ -150,6 +151,7 @@ 3ddb79bdN51qpRC-6bOH-v5hl_AK6A xen/common/network.c 3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen/common/page_alloc.c 3e54c38dkHAev597bPr71-hGzTdocg xen/common/perfc.c +4051bcecFeq4DE70p4zGO5setf47CA xen/common/physdev.c 4006e659i9j-doVxY7DKOGU4XVin1Q xen/common/rbtree.c 3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c 3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c diff --git a/tools/examples/xc_dom_create.py b/tools/examples/xc_dom_create.py index b0fa8ad16a..4af0d9b0da 100755 --- a/tools/examples/xc_dom_create.py +++ b/tools/examples/xc_dom_create.py @@ -85,6 +85,7 @@ image=''; ramdisk=''; builder_fn=''; restore=0; state_file='' mem_size=0; domain_name=''; vfr_ipaddr=[]; vbd_expert=0; auto_restart=False; vbd_list = []; cmdline_ip = ''; cmdline_root=''; cmdline_extra='' +pci_device_list = [] ##### Determine location of defautls file ##### @@ -278,6 +279,14 @@ def make_domain(): for ip in vfr_ipaddr: XenoUtil.setup_vfr_rules_for_vif( id, 0, ip ) + # check for physical device access + for (pci_bus, pci_dev, pci_func) in pci_device_list: + if xc.physdev_pci_access_modify( + dom=id, bus=pci_bus, dev=pci_dev, func=pci_func, enable=1 ) < 0: + print "Non-fatal error enabling PCI device access." + else: + print "Enabled PCI access (%d:%d:%d)." % (pci_bus,pci_dev,pci_func) + if xc.domain_start( dom=id ) < 0: print "Error starting domain" xc.domain_destroy ( dom=id ) diff --git a/tools/misc/Makefile b/tools/misc/Makefile index 743f0f4402..59fa4c1c93 100644 --- a/tools/misc/Makefile +++ b/tools/misc/Makefile @@ -16,18 +16,14 @@ all: $(TARGETS) install: all mkdir -p $(prefix)/usr/bin - cp -a $(INSTALL) $(prefix)/usr/bin - chmod 755 $(prefix)/usr/bin/xen-mkdevnodes - chmod 755 $(prefix)/usr/bin/xen_nat_enable - chmod 755 $(prefix)/usr/bin/xen-clone + cp $(INSTALL) $(prefix)/usr/bin + for i in $(INSTALL); do chmod 755 $(prefix)/usr/bin/$i ; done $(MAKE) -C miniterm install dist: all mkdir -p ../../../install/bin - cp -a $(INSTALL) ../../../install/bin - chmod 755 ../../../install/bin/xen-mkdevnodes - chmod 755 ../../../install/bin/xen_nat_enable - chmod 755 ../../../install/bin/xen-clone + cp $(INSTALL) ../../../install/bin + for i in $(INSTALL); do chmod 755 ../../../install/bin/$i ; done $(MAKE) -C miniterm dist clean: diff --git a/tools/xc/lib/xc.h b/tools/xc/lib/xc.h index 2f2d26919e..fd1494dc13 100644 --- a/tools/xc/lib/xc.h +++ b/tools/xc/lib/xc.h @@ -178,6 +178,13 @@ int xc_evtchn_status(int xc_handle, int *port2, int *chn_status); +int xc_physdev_pci_access_modify(int xc_handle, + u64 domid, + int bus, + int dev, + int func, + int enable); + int xc_readconsolering(int xc_handle, char *str, unsigned int max_chars, diff --git a/tools/xc/lib/xc_physdev.c b/tools/xc/lib/xc_physdev.c new file mode 100644 index 0000000000..6fa2851734 --- /dev/null +++ b/tools/xc/lib/xc_physdev.c @@ -0,0 +1,27 @@ +/****************************************************************************** + * xc_physdev.c + * + * API for manipulating physical-device access permissions. + * + * Copyright (c) 2004, Rolf Neugebauer (Intel Research Cambridge) + * Copyright (c) 2004, K A Fraser (University of Cambridge) + */ + +int xc_physdev_pci_access_modify(int xc_handle, + u64 domid, + int bus, + int dev, + int func, + int enable) +{ + dom0_op_t op; + + op.cmd = DOM0_PCIDEV_ACCESS; + op.u.pcidev_access.domain = (domid_t)domid; + op.u.pcidev_access.bus = bus; + op.u.pcidev_access.dev = dev; + op.u.pcidev_access.func = func; + op.u.pcidev_access.enable = enable; + + return do_dom0_op(xc_handle, &op); +} diff --git a/tools/xc/py/Xc.c b/tools/xc/py/Xc.c index 7ee540afc5..3515047ee9 100644 --- a/tools/xc/py/Xc.c +++ b/tools/xc/py/Xc.c @@ -892,6 +892,29 @@ static PyObject *pyxc_evtchn_status(PyObject *self, return dict; } +static PyObject *pyxc_physdev_pci_access_modify(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + u64 dom; + int bus, dev, func, enable, ret; + + static char *kwd_list[] = { "dom", "bus", "dev", "func", "enable", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Liiii", kwd_list, + &dom, &bus, &dev, &func, &enable) ) + { + DPRINTF("could not parse parameter list."); + return NULL; + } + + ret = xc_physdev_pci_access_modify( + xc->xc_handle, dom, bus, dev, func, enable); + + return PyInt_FromLong(ret); +} + static PyObject *pyxc_readconsolering(PyObject *self, PyObject *args, PyObject *kwds) @@ -924,9 +947,7 @@ static PyObject *pyxc_physinfo(PyObject *self, int xc_ret; xc_physinfo_t info; - xc_ret = xc_physinfo(xc->xc_handle, &info); - - if(!xc_ret) + if ( (xc_ret = xc_physinfo(xc->xc_handle, &info)) == 0 ) { ret_obj = Py_BuildValue("{s:i,s:i,s:l,s:l,s:l}", "ht_per_core", info.ht_per_core, @@ -937,7 +958,8 @@ static PyObject *pyxc_physinfo(PyObject *self, } else { - ret_obj = Py_BuildValue(""); /* None */ + Py_INCREF(Py_None); + ret_obj = Py_None; } return ret_obj; @@ -978,7 +1000,7 @@ static PyMethodDef pyxc_methods[] = { (PyCFunction)pyxc_domain_pincpu, METH_VARARGS | METH_KEYWORDS, "\n" "Pin a domain to a specified CPU.\n" - " dom [long]: Identifier of domain to be destroyed.\n" + " dom [long]: Identifier of domain to be pinned.\n" " cpu [int, -1]: CPU to pin to, or -1 to unpin\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, @@ -1195,6 +1217,17 @@ static PyMethodDef pyxc_methods[] = { " dom [long]: Port-id for endpoint at dom1.\n" " port [int]: Port-id for endpoint at dom2.\n" }, + { "physdev_pci_access_modify", + (PyCFunction)pyxc_physdev_pci_access_modify, + METH_VARARGS | METH_KEYWORDS, "\n" + "Allow a domain access to a PCI device\n" + " dom [long]: Identifier of domain to be allowed access.\n" + " bus [int]: PCI bus\n" + " dev [int]: PCI slot\n" + " func [int]: PCI function\n" + " enable [int]: Non-zero means enable access; else disable access\n\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + { "readconsolering", (PyCFunction)pyxc_readconsolering, METH_VARARGS | METH_KEYWORDS, "\n" diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S index 1ac45dd3de..08a44af997 100644 --- a/xen/arch/i386/entry.S +++ b/xen/arch/i386/entry.S @@ -727,7 +727,8 @@ ENTRY(hypervisor_call_table) .long SYMBOL_NAME(do_set_timer_op) /* 20 */ .long SYMBOL_NAME(do_event_channel_op) .long SYMBOL_NAME(do_xen_version) - .long SYMBOL_NAME(do_serial_io) + .long SYMBOL_NAME(do_console_io) + .long SYMBOL_NAME(do_physdev_op) .rept NR_syscalls-((.-hypervisor_call_table)/4) .long SYMBOL_NAME(do_ni_syscall) .endr diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c index 11e755e65d..e359026371 100644 --- a/xen/common/dom0_ops.c +++ b/xen/common/dom0_ops.c @@ -473,6 +473,17 @@ long do_dom0_op(dom0_op_t *u_dom0_op) } break; + case DOM0_PCIDEV_ACCESS: + { + extern int physdev_pci_access_modify(domid_t, int, int, int, int); + ret = physdev_pci_access_modify(op->u.pcidev_access.domain, + op->u.pcidev_access.bus, + op->u.pcidev_access.dev, + op->u.pcidev_access.func, + op->u.pcidev_access.enable); + } + break; + default: ret = -ENOSYS; diff --git a/xen/common/domain.c b/xen/common/domain.c index d226c52d5f..6f8bdf8002 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -74,6 +74,10 @@ struct task_struct *do_createdomain(domid_t dom_id, unsigned int cpu) init_blkdev_info(p); + /* Per-domain PCI-device list. */ + spin_lock_init(&p->pcidev_lock); + INIT_LIST_HEAD(&p->pcidev_list); + write_lock_irqsave(&tasklist_lock, flags); pp = &task_list; /* NB. task_list is maintained in order of dom_id. */ for ( pp = &task_list; *pp != NULL; pp = &(*pp)->next_list ) @@ -835,6 +839,9 @@ int setup_guestos(struct task_struct *p, dom0_createdomain_t *params, } kfree(xd); + /* DOM0 gets access to everything. */ + physdev_init_dom0(p); + set_bit(PF_CONSTRUCTED, &p->flags); new_thread(p, diff --git a/xen/common/physdev.c b/xen/common/physdev.c new file mode 100644 index 0000000000..3055ad2e30 --- /dev/null +++ b/xen/common/physdev.c @@ -0,0 +1,815 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2004 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: phys_dev.c + * Author: Rolf Neugebauer (rolf.neugebauer@intel.com) + * Date: Feb 2004 + * + * Description: allows a domain to access devices on the PCI bus + * + * a guest os may be given access to particular devices on the PCI + * bus. to allow the standard PCI device discovery to work it may + * also have limited access to devices (bridges) in the PCI device + * tree between the device and the PCI root device. + * + * for each domain a list of PCI devices is maintained, describing the + * access mode for the domain. + * + * guests can figure out the virtualised, or better, partioned PCI space + * through normal pci config register access. Some of the accesses, in + * particular write access are faked out. For example the sequence for + * for detecting the IO regions, which require writes to determine the + * size of teh region, is faked out by a very simple state machine, + * preventing direct writes to the PCI config registers by a guest. + * + * XXX Some comment on IRQ handling + */ + + +#include +#include +#include +#include +#include +#include +#include + +#include + + +#include +#include + +#define DBG(_x...) +//#define DBG(_x...) printk(_x) + +#define ACC_READ 1 +#define ACC_WRITE 2 + +/* upper bounds for PCI devices */ +#define PCI_BUSMAX 255 +#define PCI_DEVMAX 31 +#define PCI_FUNCMAX 7 +#define PCI_REGMAX 255 + +/* bit offsets into state */ +#define ST_BASE_ADDRESS 0 /* bits 0-5: are for base address access */ +#define ST_ROM_ADDRESS 6 /* bit 6: is for rom address access */ +#define ST_IRQ_DELIVERED 7 /* bit 7: waiting for end irq call */ + + +typedef struct _phys_dev_st +{ + int flags; /* flags for access etc */ + struct pci_dev *dev; /* the device */ + struct list_head node; /* link to the list */ + struct task_struct *owner; /* 'owner of this device' */ + int state; /* state for various checks */ + + hw_irq_controller *new_handler; /* saved old handler */ + hw_irq_controller *orig_handler; /* saved old handler */ + +} phys_dev_t; + + +#define MAX_IRQS 32 +/* an array of device descriptors index by IRQ number */ +static phys_dev_t *irqs[MAX_IRQS]; + +/* find a device on the device list */ +static phys_dev_t *find_pdev(struct task_struct *p, struct pci_dev *dev) +{ + phys_dev_t *t, *res = NULL; + struct list_head *tmp; + + list_for_each(tmp, &p->dev_list) + { + t = list_entry(tmp, phys_dev_t, node); + if ( dev == t->dev ) + { + res = t; + break; + } + } + return res; +} + +/* add the device to the list of devices task p can access */ +static void add_dev_to_task(struct task_struct *p, + struct pci_dev *dev, int acc) +{ + + phys_dev_t *pdev; + + if ( (pdev = find_pdev(p, dev)) ) + { + /* device already on list, update access */ + pdev->flags = acc; + return; + } + + /* add device */ + if ( !(pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL)) ) + { + printk("error allocating pdev structure\n"); + return; + } + + pdev->dev = dev; + pdev->flags = acc; + pdev->state = 0; + list_add(&pdev->node, &p->dev_list); + + if ( acc == ACC_WRITE ) + pdev->owner = p; + +} + +/* + * physdev_pci_access_modify: + * Allow/disallow access to a specific PCI device. Also allow read access to + * PCI devices from the device to the root of the device tree. If the given + * device is a bridge, then the domain should get access to all the devices + * attached to that bridge (XXX this is unimplemented!). + */ +int physdev_pci_access_modify( + domid_t dom, int bus, int dev, int func, int enable) +{ + struct task_struct *p; + struct pci_dev *pdev, *rdev, *tdev; + + if ( !IS_PRIV(current) ) + BUG(); + + if ( (bus > PCI_BUSMAX) || (dev > PCI_DEVMAX) || (func > PCI_FUNCMAX) ) + return -EINVAL; + + if ( !enable ) + { + DPRINTK("Disallowing access is not yet supported.\n"); + return -EINVAL; + } + + DPRINTK("physdev_pci_access_modify: %02x:%02x:%02x\n", bus, dev, func); + + if ( (p = find_domain_by_id(dom) == NULL ) + return -ESRCH; + + /* Make the domain privileged. */ + set_bit(PF_PRIVILEGED, &p->flags); + + /* Grant write access to the specified device. */ + if ( (pdev = pci_find_slot(bus, PCI_DEVFN(dev, func))) == NULL ) + { + DPRINTK(" dev does not exist\n"); + return -ENODEV; + } + add_dev_to_task(p, pdev, ACC_WRITE); + DPRINTK(" add RW %02x:%02x:%02x\n", pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + + + /* Grant read access to the root device. */ + if ( (rdev = pci_find_slot(0, PCI_DEVFN(0, 0))) == NULL ) + { + DPRINTK(" bizarre -- no PCI root dev\n"); + return -ENODEV; + } + add_dev_to_task(p, rdev, ACC_READ); + DPRINTK(" add R0 %02x:%02x:%02x\n", 0, 0, 0); + + /* Grant read access to all devices on the path to the root. */ + while ( tdev = pdev->bus->self; tdev != NULL; tdev = tdev->bus->self ) + { + add_dev_to_task(p, tdev, ACC_READ); + DPRINTK(" add RO %02x:%02x:%02x\n", tdev->bus->number, + PCI_SLOT(tdev->devfn), PCI_FUNC(tdev->devfn)); + } + + if ( pdev->hdr_type == PCI_HEADER_TYPE_NORMAL ) + return 0; + + /* The device is a bridge or cardbus. */ + printk("XXX can't give access to bridge devices yet\n"); + + return 0; +} + +/* check if a domain has general access to a device */ +inline static int check_dev_acc (struct task_struct *p, + int bus, int dev, int func, + phys_dev_t **pdev) +{ + struct pci_dev *target_dev; + phys_dev_t *target_pdev; + unsigned int target_devfn; + + *pdev = NULL; + + if ( !IS_PRIV(p) ) + return -EPERM; /* no pci acces permission */ + + if ( bus > PCI_BUSMAX || dev > PCI_DEVMAX || func > PCI_FUNCMAX ) + return -EINVAL; + + DBG("a=%c b=%x d=%x f=%x ", (acc == ACC_READ) ? 'R' : 'W', + mask, bus, dev, func); + + /* check target device */ + target_devfn = PCI_DEVFN(dev, func); + target_dev = pci_find_slot(bus, target_devfn); + if ( !target_dev ) + { + DBG("target does not exist\n"); + return -ENODEV; + } + + /* check access */ + target_pdev = find_pdev(p, target_dev); + if ( !target_pdev ) + { + DBG("dom has no access to target\n"); + return -EPERM; + } + + *pdev = target_pdev; + return 0; +} + + +/* + * Base address registers contain the base address for IO regions. + * The length can be determined by writing all 1s to the register and + * reading the value again. The device will zero the lower unused bits. + * + * to work out the length of the io region a device probe typically does: + * 1) a = read_base_addr_reg() + * 2) write_base_addr_reg(0xffffffff) + * 3) b = read_base_addr_reg() // device zeros lower bits + * 4) write_base_addr_reg(a) // restore original value + * this function fakes out step 2-4. *no* writes are made to the device. + * + * phys_dev_t contains a bit field (a bit for each base address register). + * if the bit for a register is set the guest had writen all 1s to the + * register and subsequent read request need to fake out the b. + * if the guest restores the original value (step 4 above) the bit is + * cleared again. If the guest attempts to "restores" a wrong value an + * error is flagged. + */ +static int do_base_address_access(phys_dev_t *pdev, int acc, + int seg, int bus, int dev, int func, + int reg, int len, u32 *val) +{ + int idx, st_bit, ret = -EINVAL; + u32 orig_val, sz; + struct resource *res; + + idx = (reg - PCI_BASE_ADDRESS_0)/4; + st_bit = idx + ST_BASE_ADDRESS; + res = &(pdev->dev->resource[idx]); + + if ( acc == ACC_WRITE ) + { + if ( *val == 0xffffffff || + ((res->flags & IORESOURCE_IO) && *val == 0xffff) ) + { + /* set bit and return */ + set_bit(st_bit, &pdev->state); + ret = 0; + } + else + { + /* assume guest wants to set the base address */ + clear_bit(st_bit, &pdev->state); + + /* check if guest tries to restore orig value */ + ret = pci_config_read(seg, bus, dev, func, reg, len, &orig_val); + if ( *val != orig_val ) + { + printk("caution: guest tried to change base address range.\n"); + ret = -EPERM; + } + } + DBG("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x" + " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, + pdev->state); + + } + + else if ( acc == ACC_READ ) + { + if ( !test_bit(st_bit, &pdev->state) ) + { + /* just read and return */ + ret = pci_config_read(seg, bus, dev, func, reg, len, val); + } + else + { + /* fake value */ + ret = pci_config_read(seg, bus, dev, func, reg, len, &orig_val); + + sz = res->end - res->start; + + if ( res->flags & IORESOURCE_MEM ) + { + *val = 0xffffffff; + /* bit 0 = 0 */ + /* bit 21 = memory type */ + /* bit 3 = prefetchable */ + /* bit 4-31 width */ + sz = sz >> 4; /* size in blocks of 16 byte */ + sz = ~sz; /* invert */ + *val = *val & (sz << 4); /* and in the size */ + /* use read values for low 4 bits */ + *val = *val | (orig_val & 0xf); + } + else if ( res->flags & IORESOURCE_IO ) + { + *val = 0x0000ffff; + /* bit 10 = 01 */ + /* bit 2-31 width */ + sz = sz >> 2; /* size in dwords */ + sz = ~sz & 0x0000ffff; + *val = *val & (sz << 2); + *val = *val | 0x1; + } + ret = 0; + } + DBG("fixed pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x" + " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, pdev->state); + } + + return ret; +} + +/* + * fake out read/write access to rom address register + * pretty much the same as a above + */ +static int do_rom_address_access(phys_dev_t *pdev, int acc, + int seg, int bus, int dev, int func, + int reg, int len, u32 *val) +{ + int st_bit, ret = -EINVAL; + u32 orig_val, sz; + struct resource *res; + + st_bit = ST_ROM_ADDRESS; + res = &(pdev->dev->resource[PCI_ROM_RESOURCE]); + + if ( acc == ACC_WRITE ) + { + if ( *val == 0xffffffff || *val == 0xfffffffe) + { + /* 0xffffffff would be unusual, but we check anyway */ + /* set bit and return */ + set_bit(st_bit, &pdev->state); + ret = 0; + } + else + { + /* assume guest wants to set the base address */ + clear_bit(st_bit, &pdev->state); + + /* check if guest tries to restore orig value */ + ret = pci_config_read(seg, bus, dev, func, reg, len, &orig_val); + if ( (*val != orig_val) ) + { + if (*val != 0x00000000 ) + { + printk("caution: guest tried to change rom address.\n"); + ret = -EPERM; + } + else + { + printk ("guest disabled rom access for %02x:%02x:%02x\n", + bus, dev, func); + ret = 0; + } + } + + } + DBG("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x" + " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, pdev->state); + + } + else if ( acc == ACC_READ ) + { + if ( !test_bit(st_bit, &pdev->state) ) + { + /* just read and return */ + ret = pci_config_read(seg, bus, dev, func, reg, len, val); + } + else + { + /* fake value */ + ret = pci_config_read(seg, bus, dev, func, reg, len, &orig_val); + sz = res->end - res->start; + *val = 0xffffffff; + /* leave bit 0 untouched */ + /* bit 1-10 reserved, harwired to 0 */ + sz = sz >> 11; /* size is in 2KB blocks */ + sz = ~sz; + *val = *val & (sz << 11); + *val = *val | (orig_val & 0x1); + } + + DBG("fixed pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x" + " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, pdev->state); + } + return ret; + +} + +/* + * handle a domains pci config space read access if it has access to + * the device. + * For some registers for read-only devices (e.g. address base registers) + * we need to maintain a state machine. + */ +static long pci_cfgreg_read(int seg, int bus, int dev, int func, int reg, + int len, u32 *val) +{ + int ret = 0; + phys_dev_t *pdev; + + ret = check_dev_acc(current, bus, dev, func, &pdev); + if ( ret != 0 ) + return ret; + + /* fake out read requests for some registers */ + switch (reg) + { + case PCI_BASE_ADDRESS_0: + case PCI_BASE_ADDRESS_1: + case PCI_BASE_ADDRESS_2: + case PCI_BASE_ADDRESS_3: + case PCI_BASE_ADDRESS_4: + case PCI_BASE_ADDRESS_5: + ret = do_base_address_access (pdev, ACC_READ, seg, bus, dev, + func, reg, len, val); + return ret; + break; + case PCI_ROM_ADDRESS: + ret = do_rom_address_access (pdev, ACC_READ, seg, bus, dev, + func, reg, len, val); + return ret; + break; + default: + break; + } + + ret = pci_config_read(seg, bus, dev, func, reg, len, val); + + DBG("pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x val=0x%08x\n", + bus, dev, func, reg, len, *val); + return ret; +} + +/* + * handle a domains pci config space write accesses if it has access to + * the device. + * for some registers a state machine is maintained to fake out r/w access. + * By default no write access is allowed but we may change that in the future. + */ +static long pci_cfgreg_write(int seg, int bus, int dev, int func, int reg, + int len, u32 val) +{ + int ret = 0; + phys_dev_t *pdev; + + ret = check_dev_acc(current, bus, dev, func, &pdev); + if ( ret != 0 ) + return ret; + + /* special treatment for some registers */ + switch (reg) + { + case PCI_BASE_ADDRESS_0: + case PCI_BASE_ADDRESS_1: + case PCI_BASE_ADDRESS_2: + case PCI_BASE_ADDRESS_3: + case PCI_BASE_ADDRESS_4: + case PCI_BASE_ADDRESS_5: + ret = do_base_address_access (pdev, ACC_WRITE, seg, bus, dev, + func, reg, len, &val); + return ret; + break; + case PCI_ROM_ADDRESS: + ret = do_rom_address_access (pdev, ACC_WRITE, seg, bus, dev, + func, reg, len, &val); + return ret; + break; + default: + //if ( pdev->flags != ACC_WRITE ) + /* XXX for debug we disallow all write access */ + { + printk("pci write not allowed %02x:%02x:%02x: " + "reg=0x%02x len=0x%02x val=0x%08x\n", + bus, dev, func, reg, len, val); + return -EPERM; + } + break; + } + + ret = pci_config_write(seg, bus, dev, func, reg, len, val); + + DBG("pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x val=0x%08x\n", + bus, dev, func, reg, len, val); + return ret; +} + + +/* + * return the IRQ xen assigned to the device. + * This may be different to what is in the PCI confic space! + * XXX RN: I'm not sure we need this. we could just intercept PCI config + * reads on PCI_INTERRUPT_LINE and return the correct value. + */ +static long pci_find_irq(int seg, int bus, int dev, int func, u32 *val) +{ + int ret = 0; + phys_dev_t *pdev; + + ret = check_dev_acc(current, bus, dev, func, &pdev); + if ( ret != 0 ) + return ret; + + *val = pdev->dev->irq; + return 0; +} + +static void phys_dev_interrupt(int irq, void *dev_id, struct pt_regs *ptregs) +{ + phys_dev_t *pdev; + struct task_struct *p; + unsigned long cpu_mask = 0; + + if ( !(pdev = (phys_dev_t *)dev_id) ) + { + printk("spurious interrupt, no proper device id, %d\n", irq); + return; + } + + //printk("irq %d pdev=%p\n", irq, pdev); + + p = pdev->owner; + + //printk("owner %p\n", p); + + if ( test_bit(irq, &p->shared_info->virt_phys_irq) ) + { + printk("irq %d already delivered to guest\n", irq); + return; + } + /* notify guest */ + set_bit(irq, &p->shared_info->virt_phys_irq); + set_bit(ST_IRQ_DELIVERED, &pdev->state); + cpu_mask |= mark_guest_event(p, _EVENT_TIMER); + guest_event_notify(cpu_mask); +} + +/* this is called instead of the PICs original end handler. + * the real end handler is only called once the guest ack'ed the handling + * of the event. */ +static void end_virt_irq (unsigned int i) +{ + /* nothing */ +} + +/* + * a guest request an IRQ from a device to be routed to it + * - shared interrupts are not allowed for now + * - we change the hw_irq handler to something else + */ +static long pci_request_irq(int irq) +{ + int err; + phys_dev_t *pdev, *t; + hw_irq_controller *new, *orig; + struct list_head *tmp; + + printk("request irq %d\n", irq); + + /* find pdev */ + + list_for_each(tmp, ¤t->dev_list) + { + t = list_entry(tmp, phys_dev_t, node); + if ( t->dev->irq == irq ) + { + pdev = t; + break; + } + } + + if ( !pdev ) + { + printk("no device matching IRQ %d\n", irq); + return -EINVAL; + } + + printk("pdev= %p\n", pdev); + + if ( irq >= MAX_IRQS ) + { + printk("requested IRQ to big %d\n", irq); + return -EINVAL; + } + + if ( irqs[irq] != NULL ) + { + printk ("irq already in use %d\n", irq); + return -EPERM; + } + + /* allocate a hw_irq controller and copy the original */ + if ( !(new = kmalloc(sizeof(hw_irq_controller), GFP_KERNEL)) ) + { + printf("error allocating new irq controller\n"); + return -ENOMEM; + } + orig = irq_desc[irq].handler; + new->typename = orig->typename; + new->startup = orig->startup; + new->shutdown = orig->shutdown; + new->enable = orig->enable; + new->disable = orig->disable; + new->ack = orig->ack; + new->end = orig->end; + new->set_affinity = orig->set_affinity; + + /* swap the end routine */ + new->end = end_virt_irq; + + /* change the irq controllers */ + pdev->orig_handler = orig; + pdev->new_handler = new; + irq_desc[irq].handler = new; + irqs[irq] = pdev; + + printk ("setup handler %d\n", irq); + + /* request the IRQ. this is not shared! */ + err = request_irq(irq, phys_dev_interrupt, 0, "network", (void *)pdev); + if ( err ) + { + printk("error requesting irq\n"); + /* restore original */ + irq_desc[irq].handler = pdev->orig_handler; + /* free memory */ + kfree(new); + return err; + } + + printk ("done\n"); + + return 0; +} + +static long pci_free_irq(int irq) +{ + /* XXX restore original handler and free_irq() */ + return 0; +} + +static long pci_enable_irq(int irq) +{ + /* XXX not sure we need this */ + /* guest can enable phys_irq event for now */ + return 0; +} + +static long pci_disable_irq(int irq) +{ + /* XXX not sure we need this */ + /* guest can disable phys_irq event for now */ + return 0; +} + +static long pci_finished_irq(int irq) +{ + phys_dev_t *pdev; + + if ( !(pdev = irqs[irq]) ) + { + printk("finished_irq called for unregistered irq %d\n", irq); + return -EINVAL; + } + + if ( pdev->owner != current ) + { + printk("finished_irq called dom not owning irq %d\n", irq); + return -EPERM; + } + + if ( !test_bit(ST_IRQ_DELIVERED, &pdev->state) ) + { + printk("finished_irq called for undelivered irq %d\n", irq); + return -EINVAL; + } + + if ( test_bit(irq, ¤t->shared_info->virt_phys_irq) ) + { + printk("finished_irq called for un-acknowleged irq %d\n", irq); + return -EINVAL; + } + + clear_bit(ST_IRQ_DELIVERED, &pdev->state); + + /* call original end handler */ + pdev->orig_handler->end(irq); + + return 0; +} + +/* + * demux hypervisor call. + */ +long do_phys_dev_op(phys_dev_op_t *uop) +{ + phys_dev_op_t op; + long ret; + + + if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) + return -EFAULT; + + switch ( op.cmd ) + { + case DEVOP_CFGREG_READ: + ret = pci_cfgreg_read (op.u.cfg_read.seg, op.u.cfg_read.bus, + op.u.cfg_read.dev, op.u.cfg_read.func, + op.u.cfg_read.reg, op.u.cfg_read.len, + &op.u.cfg_read.value); + break; + + case DEVOP_CFGREG_WRITE: + ret = pci_cfgreg_write (op.u.cfg_write.seg, op.u.cfg_write.bus, + op.u.cfg_write.dev, op.u.cfg_write.func, + op.u.cfg_write.reg, op.u.cfg_write.len, + op.u.cfg_write.value); + break; + + case DEVOP_FIND_IRQ: + ret = pci_find_irq (op.u.find_irq.seg, op.u.find_irq.bus, + op.u.find_irq.dev, op.u.find_irq.func, + &op.u.find_irq.irq); + break; + + case DEVOP_REQUEST_IRQ: + ret = pci_request_irq (op.u.request_irq.irq); + break; + + case DEVOP_FREE_IRQ: + ret = pci_free_irq (op.u.free_irq.irq); + break; + + case DEVOP_ENABLE_IRQ: + ret = pci_enable_irq (op.u.enable_irq.irq); + break; + + case DEVOP_DISABLE_IRQ: + ret = pci_disable_irq (op.u.disable_irq.irq); + break; + + case DEVOP_FINISHED_IRQ: + ret = pci_finished_irq (op.u.finished_irq.irq); + break; + + default: + ret = -EINVAL; + break; + } + + copy_to_user(uop, &op, sizeof(op)); + return ret; +} + + +/* + * Domain 0 has read access to all devices. + * XXX this is a bit of a hack + */ +void physdev_init_dom0(struct task_struct *p) +{ + struct pci_dev *dev; + phys_dev_t *pdev; + + printk("Give Dom0 read access to all PCI devices\n"); + + INIT_LIST_HEAD(&p->dev_list); + + pci_for_each_dev(dev) + { + /* add device */ + pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL); + pdev->dev = dev; + pdev->flags = ACC_READ; + pdev->state = 0; + pdev->owner = p; + list_add(&pdev->node, &p->dev_list); + } +} diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c index 370c449423..3a099ff108 100644 --- a/xen/drivers/char/console.c +++ b/xen/drivers/char/console.c @@ -264,7 +264,7 @@ static void serial_rx(unsigned char c, struct pt_regs *regs) } } -long do_serial_io(int cmd, int count, char *buffer) +long do_console_io(int cmd, int count, char *buffer) { char *kbuf; long rc; @@ -275,7 +275,7 @@ long do_serial_io(int cmd, int count, char *buffer) switch ( cmd ) { - case SERIALIO_write: + case CONSOLEIO_write: if ( count > (PAGE_SIZE-1) ) count = PAGE_SIZE-1; if ( (kbuf = (char *)get_free_page(GFP_KERNEL)) == NULL ) @@ -288,7 +288,7 @@ long do_serial_io(int cmd, int count, char *buffer) serial_puts(sercon_handle, kbuf); free_page((unsigned long)kbuf); break; - case SERIALIO_read: + case CONSOLEIO_read: rc = 0; while ( (serial_rx_cons != serial_rx_prod) && (rc < count) ) { diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index afba398828..80cc7b8aee 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -37,7 +37,7 @@ #define __HYPERVISOR_set_timer_op 20 #define __HYPERVISOR_event_channel_op 21 #define __HYPERVISOR_xen_version 22 -#define __HYPERVISOR_serial_io 23 +#define __HYPERVISOR_console_io 23 /* * MULTICALLS @@ -133,10 +133,10 @@ #define SCHEDOP_stop 4 /* Stop executing this domain. */ /* - * Commands to HYPERVISOR_serial_io(). + * Commands to HYPERVISOR_console_io(). */ -#define SERIALIO_write 0 -#define SERIALIO_read 1 +#define CONSOLEIO_write 0 +#define CONSOLEIO_read 1 #ifndef __ASSEMBLY__ diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/console/console.c b/xenolinux-2.4.25-sparse/arch/xeno/drivers/console/console.c index f0c475200d..2e18d72854 100644 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/console/console.c +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/console/console.c @@ -79,7 +79,7 @@ static void priv_conwrite(const char *s, unsigned int count) while ( count > 0 ) { - if ( (rc = HYPERVISOR_serial_io(SERIALIO_write, count, s)) > 0 ) + if ( (rc = HYPERVISOR_console_io(CONSOLEIO_write, count, s)) > 0 ) { count -= rc; s += rc; @@ -174,7 +174,7 @@ static void __do_console_io(void) if ( start_info.flags & SIF_INITDOMAIN ) { /* Receive work. */ - while ( (len = HYPERVISOR_serial_io(SERIALIO_read, 16, rbuf)) > 0 ) + while ( (len = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0 ) for ( i = 0; i < len; i++ ) tty_insert_flip_char(xeno_console_tty, rbuf[i], 0); if ( xeno_console_tty->flip.count != 0 ) diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/hypervisor.h b/xenolinux-2.4.25-sparse/include/asm-xeno/hypervisor.h index c6959e107b..8a25957722 100644 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/hypervisor.h +++ b/xenolinux-2.4.25-sparse/include/asm-xeno/hypervisor.h @@ -440,12 +440,12 @@ static inline int HYPERVISOR_xen_version(int cmd) return ret; } -static inline int HYPERVISOR_serial_io(int cmd, int count, char *str) +static inline int HYPERVISOR_console_io(int cmd, int count, char *str) { int ret; __asm__ __volatile__ ( TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_serial_io), + : "=a" (ret) : "0" (__HYPERVISOR_console_io), "b" (cmd), "c" (count), "d" (str) : "memory" ); return ret; -- 2.30.2